dm 'log;clear;output;clear;';
*ods html close; /* close previous */
*ods html; /* open new */


*********************;
*read in datasets;
*********************;

PROC IMPORT OUT= WORK.workers_producers
            DATAFILE= "C:\Users\jlusk\Dropbox\Data\MISC\microsoft\number of producers by county.csv"
            DBMS=csv;
     GETNAMES=YES;

PROC IMPORT OUT= WORK.workers_producers_state
            DATAFILE= "C:\Users\jlusk\Dropbox\Data\MISC\microsoft\number of producers by state.csv"
            DBMS=csv;
     GETNAMES=YES;

PROC IMPORT OUT= WORK.workers_hired
            DATAFILE= "C:\Users\jlusk\Dropbox\Data\MISC\microsoft\number of hired workers by county.csv"
            DBMS=csv;
     GETNAMES=YES;

PROC IMPORT OUT= WORK.workers_hired_state
            DATAFILE= "C:\Users\jlusk\Dropbox\Data\MISC\microsoft\number of hired workers by state.csv"
            DBMS=csv;
     GETNAMES=YES;

PROC IMPORT OUT= WORK.workers_migrant
            DATAFILE= "C:\Users\jlusk\Dropbox\Data\MISC\microsoft\number of migrant workers by county.csv"
            DBMS=csv replace;
     GETNAMES=YES;

PROC IMPORT OUT= WORK.workers_migrant_state
            DATAFILE= "C:\Users\jlusk\Dropbox\Data\MISC\microsoft\number of migrant workers by state.csv"
            DBMS=csv;
     GETNAMES=YES;

PROC IMPORT OUT= WORK.workers_upaid
            DATAFILE= "C:\Users\jlusk\Dropbox\Data\MISC\microsoft\number of unpaid workers by county.csv"
            DBMS=csv;
     GETNAMES=YES;

PROC IMPORT OUT= WORK.workers_upaid_state
            DATAFILE= "C:\Users\jlusk\Dropbox\Data\MISC\microsoft\number of unpaid workers by state.csv"
            DBMS=csv;
     GETNAMES=YES;


PROC IMPORT OUT= WORK.population
            DATAFILE= "C:\Users\jlusk\Dropbox\Data\MISC\microsoft\population_2019 by county.csv"
            DBMS=csv;
     GETNAMES=YES;


*these data are from: https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv;
PROC IMPORT OUT= WORK.cases
            DATAFILE= "C:\Users\jlusk\Dropbox\Data\MISC\microsoft\deaths_JHU_2021.csv"
            DBMS=csv;
GETNAMES=YES;




*********************************;
*these steps rename variables and datasets;
*********************************;
data workers1;
set workers_producers;
state_name=state;
county_name=county;
drop state;
drop county;
rename state_ANSI=state;
rename county_ANSI=county;
workers1=value;

data workers2;
set workers_hired;
state_name=state;
county_name=county;
drop state;
drop county;
rename state_ANSI=state;
rename county_ANSI=county;
workers2=value;

data workers3;
set WORK.workers_upaid;
state_name=state;
county_name=county;
drop state;
drop county;
rename state_ANSI=state;
rename county_ANSI=county;
workers3=value;

data workers4;
set workers_migrant;
state_name=state;
county_name=county;
drop state;
drop county;
rename state_ANSI=state;
rename county_ANSI=county;
workers4=value;

data workers1_s;
set workers_producers_state;
state_name=state;
county_name=county;
drop state;
drop county;
rename state_ANSI=state;
rename county_ANSI=county;
workers1_s=value;

data workers2_s;
set workers_hired_state;
state_name=state;
county_name=county;
drop state;
drop county;
rename state_ANSI=state;
rename county_ANSI=county;
workers2_s=value;

data workers3_s;
set WORK.workers_upaid_state;
state_name=state;
county_name=county;
drop state;
drop county;
rename state_ANSI=state;
rename county_ANSI=county;
workers3_s=value;

data workers4_s;
set workers_migrant_state;
state_name=state;
county_name=county;
drop state;
drop county;
rename state_ANSI=state;
rename county_ANSI=county;
workers4_s=value;

data cases;
set cases;
cases=m3d31y21; 

proc means data=cases sum noprint;
class state;
var cases;
output out=cases_s sum=cases_sum;

data cases_s;
set cases_s (drop=_FREQ_);
if _TYPE_=0 then delete;


*****************************;
*combine datasets;
****************************;

proc sort data=workers1; by state county;
proc sort data=workers2; by state county;
proc sort data=workers3; by state county;
proc sort data=workers4; by state county;
proc sort data=workers1_s; by state ;
proc sort data=workers2_s; by state ;
proc sort data=workers3_s; by state ;
proc sort data=workers4_s; by state ;
proc sort data=population; by state ;
proc sort data=cases; by state county;


data combo;
merge workers1 (keep=state county workers1 state_name county_name) workers2 (keep=state county workers2) workers3 (keep=state county workers3) workers4 (keep=state county workers4) population (keep=state county pop_2017 pop_2010 pop_2019) cases (drop=fips state_name county_name lat long_ combined_key); by state county;
	  
if state=. then delete;
if state=0 then delete;
if county=0 then delete;
if county=. then delete;
if cases=. then cases=0;
if deaths=. then deaths=0;

if workers1=. then workers1=0;
if workers2=. then workers2=0;
if workers3=. then workers3=0;
if workers4=. then workers4=0;
if workers1=-999 then workers1=.;
if workers2=-999 then workers2=.;
if workers3=-999 then workers3=.;
if workers4=-999 then workers4=.;

data combo2;
merge combo workers1_s(keep=state workers1_s) workers2_s(keep=state workers2_s) workers3_s(keep=state workers3_s) workers4_s(keep=state workers4_s) cases_s (keep=state cases_sum); by state;


**********************;
*assigned missing workers and cases to counties based on state totals;
**********************;

proc means data=combo2 sum noprint;
class state;
var workers1-workers4 cases;
output out=hold sum=wsum1-wsum4 cases_sum2;

data hold;
set hold;
if _TYPE_=0 then delete;

data combo3;
merge combo2 hold (drop=_TYPE_ _FREQ_); by state;
missing1=workers1_s-wsum1;
missing2=workers2_s-wsum2;
missing3=workers3_s-wsum3;
missing4=workers4_s-wsum4;
missing_cases=cases_sum-cases_sum2;

proc means data=combo3 sum noprint;
where workers2=.;
class state;
var workers1;
output out=hold2 sum=misssum2;

proc means data=combo3 sum noprint;
where workers3=.;
class state;
var workers1;
output out=hold3 sum=misssum3;

proc means data=combo3 sum noprint;
where workers4=.;
class state;
var workers1;
output out=hold4 sum=misssum4;

proc means data=combo3 sum noprint;
where cases=0;
class state;
var pop_2019;
output out=hold5 sum=misssum5;


*******************;
*perform key calculations to estimate cases;
*******************;

data combo4;
merge combo3 hold2(drop=_TYPE_ _FREQ_) hold3(drop=_TYPE_ _FREQ_) hold4(drop=_TYPE_ _FREQ_) hold5(drop=_TYPE_ _FREQ_); by state;
if state=. then delete;
if county=. then delete;
if county=0 then delete;

*for counties with missing workers (due to disclosure), assign them their worker1 share of the missing workers at the state level;
if workers2=. then workers2=missing2*(workers1/misssum2);
if workers3=. then workers3=missing3*(workers1/misssum3);
if workers4=. then workers4=missing4*(workers1/misssum4);

if state=49 and cases=0 then cases=missing_cases*(pop_2019/misssum5);

cases_w1=cases*(workers1/pop_2017);
cases_w2=cases*(workers2/pop_2017)*(1/1.32);
cases_w3=cases*(workers3/pop_2017);
cases_w4=cases*(workers4/pop_2017)*(1/1.32);

/*
deaths_w1=deaths*(workers1/pop_2017);
deaths_w2=deaths*(workers2/pop_2017);
deaths_w3=deaths*(workers3/pop_2017);
deaths_w4=deaths*(workers4/pop_2017);
*/;


rate=cases/pop_2019;
*density=workers1/pop_2017;



array datey(396) 
m3d1y20	m3d2y20	m3d3y20	m3d4y20	m3d5y20	m3d6y20	m3d7y20	m3d8y20	m3d9y20	m3d10y20	m3d11y20	m3d12y20	m3d13y20	m3d14y20	m3d15y20	m3d16y20	m3d17y20	m3d18y20	m3d19y20	m3d20y20	m3d21y20	m3d22y20	m3d23y20	m3d24y20	m3d25y20	m3d26y20	m3d27y20	m3d28y20	m3d29y20	m3d30y20	m3d31y20
m4d1y20	m4d2y20	m4d3y20	m4d4y20	m4d5y20	m4d6y20	m4d7y20	m4d8y20	m4d9y20	m4d10y20	m4d11y20	m4d12y20	m4d13y20	m4d14y20	m4d15y20	m4d16y20	m4d17y20	m4d18y20	m4d19y20	m4d20y20	m4d21y20	m4d22y20	m4d23y20	m4d24y20	m4d25y20	m4d26y20	m4d27y20	m4d28y20	m4d29y20	m4d30y20	
m5d1y20	m5d2y20	m5d3y20	m5d4y20	m5d5y20	m5d6y20	m5d7y20	m5d8y20	m5d9y20	m5d10y20	m5d11y20	m5d12y20	m5d13y20	m5d14y20	m5d15y20	m5d16y20	m5d17y20	m5d18y20	m5d19y20	m5d20y20	m5d21y20	m5d22y20	m5d23y20	m5d24y20	m5d25y20	m5d26y20	m5d27y20	m5d28y20	m5d29y20	m5d30y20	m5d31y20
m6d1y20	m6d2y20	m6d3y20	m6d4y20	m6d5y20	m6d6y20	m6d7y20	m6d8y20	m6d9y20	m6d10y20	m6d11y20	m6d12y20	m6d13y20	m6d14y20	m6d15y20	m6d16y20	m6d17y20	m6d18y20	m6d19y20	m6d20y20	m6d21y20	m6d22y20	m6d23y20	m6d24y20	m6d25y20	m6d26y20	m6d27y20	m6d28y20	m6d29y20	m6d30y20	
m7d1y20	m7d2y20	m7d3y20	m7d4y20	m7d5y20	m7d6y20	m7d7y20	m7d8y20	m7d9y20	m7d10y20	m7d11y20	m7d12y20	m7d13y20	m7d14y20	m7d15y20	m7d16y20	m7d17y20	m7d18y20	m7d19y20	m7d20y20	m7d21y20	m7d22y20	m7d23y20	m7d24y20	m7d25y20	m7d26y20	m7d27y20	m7d28y20	m7d29y20	m7d30y20	m7d31y20
m8d1y20	m8d2y20	m8d3y20	m8d4y20	m8d5y20	m8d6y20	m8d7y20	m8d8y20	m8d9y20	m8d10y20	m8d11y20	m8d12y20	m8d13y20	m8d14y20	m8d15y20	m8d16y20	m8d17y20	m8d18y20	m8d19y20	m8d20y20	m8d21y20	m8d22y20	m8d23y20	m8d24y20	m8d25y20	m8d26y20	m8d27y20	m8d28y20	m8d29y20	m8d30y20	m8d31y20
m9d1y20	m9d2y20	m9d3y20	m9d4y20	m9d5y20	m9d6y20	m9d7y20	m9d8y20	m9d9y20	m9d10y20	m9d11y20	m9d12y20	m9d13y20	m9d14y20	m9d15y20	m9d16y20	m9d17y20	m9d18y20	m9d19y20	m9d20y20	m9d21y20	m9d22y20	m9d23y20	m9d24y20	m9d25y20	m9d26y20	m9d27y20	m9d28y20	m9d29y20	m9d30y20	
m10d1y20	m10d2y20	m10d3y20	m10d4y20	m10d5y20	m10d6y20	m10d7y20	m10d8y20	m10d9y20	m10d10y20	m10d11y20	m10d12y20	m10d13y20	m10d14y20	m10d15y20	m10d16y20	m10d17y20	m10d18y20	m10d19y20	m10d20y20	m10d21y20	m10d22y20	m10d23y20	m10d24y20	m10d25y20	m10d26y20	m10d27y20	m10d28y20	m10d29y20	m10d30y20	m10d31y20
m11d1y20	m11d2y20	m11d3y20	m11d4y20	m11d5y20	m11d6y20	m11d7y20	m11d8y20	m11d9y20	m11d10y20	m11d11y20	m11d12y20	m11d13y20	m11d14y20	m11d15y20	m11d16y20	m11d17y20	m11d18y20	m11d19y20	m11d20y20	m11d21y20	m11d22y20	m11d23y20	m11d24y20	m11d25y20	m11d26y20	m11d27y20	m11d28y20	m11d29y20	m11d30y20	
m12d1y20	m12d2y20	m12d3y20	m12d4y20	m12d5y20	m12d6y20	m12d7y20	m12d8y20	m12d9y20	m12d10y20	m12d11y20	m12d12y20	m12d13y20	m12d14y20	m12d15y20	m12d16y20	m12d17y20	m12d18y20	m12d19y20	m12d20y20	m12d21y20	m12d22y20	m12d23y20	m12d24y20	m12d25y20	m12d26y20	m12d27y20	m12d28y20	m12d29y20	m12d30y20	m12d31y20
m1d1y21	m1d2y21	m1d3y21	m1d4y21	m1d5y21	m1d6y21	m1d7y21	m1d8y21	m1d9y21	m1d10y21	m1d11y21	m1d12y21	m1d13y21	m1d14y21	m1d15y21	m1d16y21	m1d17y21	m1d18y21	m1d19y21	m1d20y21	m1d21y21	m1d22y21	m1d23y21	m1d24y21	m1d25y21	m1d26y21	m1d27y21	m1d28y21	m1d29y21	m1d30y21	m1d31y21
m2d1y21	m2d2y21	m2d3y21	m2d4y21	m2d5y21	m2d6y21	m2d7y21	m2d8y21	m2d9y21	m2d10y21	m2d11y21	m2d12y21	m2d13y21	m2d14y21	m2d15y21	m2d16y21	m2d17y21	m2d18y21	m2d19y21	m2d20y21	m2d21y21	m2d22y21	m2d23y21	m2d24y21	m2d25y21	m2d26y21	m2d27y21	m2d28y21			
m3d1y21	m3d2y21	m3d3y21	m3d4y21	m3d5y21	m3d6y21	m3d7y21	m3d8y21	m3d9y21	m3d10y21	m3d11y21	m3d12y21	m3d13y21	m3d14y21	m3d15y21	m3d16y21	m3d17y21	m3d18y21	m3d19y21	m3d20y21	m3d21y21	m3d22y21	m3d23y21	m3d24y21	m3d25y21	m3d26y21	m3d27y21	m3d28y21	m3d29y21	m3d30y21	m3d31y21;


array cases_w1_(396) cases_w1_1-cases_w1_396;
array cases_w2_(396) cases_w2_1-cases_w2_396;
array cases_w3_(396) cases_w3_1-cases_w3_396;
array cases_w4_(396) cases_w4_1-cases_w4_396;
array all_cases(396) all_cases1-all_cases396;

do j=1 to 396;
cases_w1_(j)=datey(j)*(workers1/pop_2017); 
cases_w2_(j)=datey(j)*(workers2/pop_2017); 
cases_w3_(j)=datey(j)*(workers3/pop_2017); 
cases_w4_(j)=datey(j)*(workers4/pop_2017); 
all_cases(j)=datey(j);
end;

lw1=log(workers1+1);
lw2=log(workers2+1);
lw3=log(workers3+1);
lw4=log(workers4+1);
lrate=log(rate+0.00001);


*regression results relating case rates to ag workers;
proc glm data=combo4;
class state;
model lrate=state lw1/solution;
proc glm data=combo4;
class state;
model lrate=state lw2/solution;
proc glm data=combo4;
class state;
model lrate=state lw3/solution;
proc glm data=combo4;
class state;
model lrate=state lw4/solution;


*provides total estimated cases;
proc means data=combo4 sum;
var cases_w1-cases_w4;

*location of highest # of cases;
proc sort data=combo4; by descending cases_w1;
proc print data=combo4 (obs=10);
var state_name county_name cases_w1;

*provides total estimated cases by day;
proc means data=combo4 sum;
var cases_w1_1-cases_w1_396 cases_w2_1-cases_w2_396 cases_w3_1-cases_w3_396 cases_w4_1-cases_w4_396 all_cases1-all_cases396;


run;quit;
